{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#  第六步：用调好的参数进行模型训练\n",
    "参数为 learning_rate =0.1,n_estimators=247,max_depth=6,min_child_weight=2,gamma=0,subsample=0.9,colsample_bytree=0.6,colsample_bylevel = 0.7,objective= 'multi:softprob',reg_alpha=2,reg_lambda=0.5,seed=3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-01-04T03:16:07.282621Z",
     "start_time": "2018-01-04T03:16:06.047198Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/anaconda3/lib/python3.6/site-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
      "  \"This module will be removed in 0.20.\", DeprecationWarning)\n"
     ]
    }
   ],
   "source": [
    "from xgboost import XGBClassifier\n",
    "import xgboost as xgb\n",
    "\n",
    "import pandas as pd \n",
    "import numpy as np\n",
    "\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.model_selection import StratifiedKFold\n",
    "\n",
    "from sklearn.metrics import log_loss\n",
    "\n",
    "from matplotlib import pyplot\n",
    "import seaborn as sns\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 读取数据 & 数据探索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-01-04T03:16:10.321018Z",
     "start_time": "2018-01-04T03:16:08.800776Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bathrooms</th>\n",
       "      <th>bedrooms</th>\n",
       "      <th>price</th>\n",
       "      <th>price_bathrooms</th>\n",
       "      <th>price_bedrooms</th>\n",
       "      <th>room_diff</th>\n",
       "      <th>room_num</th>\n",
       "      <th>Year</th>\n",
       "      <th>Month</th>\n",
       "      <th>Day</th>\n",
       "      <th>...</th>\n",
       "      <th>walk</th>\n",
       "      <th>walls</th>\n",
       "      <th>war</th>\n",
       "      <th>washer</th>\n",
       "      <th>water</th>\n",
       "      <th>wheelchair</th>\n",
       "      <th>wifi</th>\n",
       "      <th>windows</th>\n",
       "      <th>work</th>\n",
       "      <th>interest_level</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.5</td>\n",
       "      <td>3</td>\n",
       "      <td>3000</td>\n",
       "      <td>1200.0</td>\n",
       "      <td>750.000000</td>\n",
       "      <td>-1.5</td>\n",
       "      <td>4.5</td>\n",
       "      <td>2016</td>\n",
       "      <td>6</td>\n",
       "      <td>24</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>5465</td>\n",
       "      <td>2732.5</td>\n",
       "      <td>1821.666667</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>6</td>\n",
       "      <td>12</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2850</td>\n",
       "      <td>1425.0</td>\n",
       "      <td>1425.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>4</td>\n",
       "      <td>17</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>3275</td>\n",
       "      <td>1637.5</td>\n",
       "      <td>1637.500000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>4</td>\n",
       "      <td>18</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.0</td>\n",
       "      <td>4</td>\n",
       "      <td>3350</td>\n",
       "      <td>1675.0</td>\n",
       "      <td>670.000000</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>4</td>\n",
       "      <td>28</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 228 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   bathrooms  bedrooms  price  price_bathrooms  price_bedrooms  room_diff  \\\n",
       "0        1.5         3   3000           1200.0      750.000000       -1.5   \n",
       "1        1.0         2   5465           2732.5     1821.666667       -1.0   \n",
       "2        1.0         1   2850           1425.0     1425.000000        0.0   \n",
       "3        1.0         1   3275           1637.5     1637.500000        0.0   \n",
       "4        1.0         4   3350           1675.0      670.000000       -3.0   \n",
       "\n",
       "   room_num  Year  Month  Day       ...        walk  walls  war  washer  \\\n",
       "0       4.5  2016      6   24       ...           0      0    0       0   \n",
       "1       3.0  2016      6   12       ...           0      0    0       0   \n",
       "2       2.0  2016      4   17       ...           0      0    0       0   \n",
       "3       2.0  2016      4   18       ...           0      0    0       0   \n",
       "4       5.0  2016      4   28       ...           0      0    1       0   \n",
       "\n",
       "   water  wheelchair  wifi  windows  work  interest_level  \n",
       "0      0           0     0        0     0               1  \n",
       "1      0           0     0        0     0               2  \n",
       "2      0           0     0        0     0               0  \n",
       "3      0           0     0        0     0               2  \n",
       "4      0           0     0        0     0               2  \n",
       "\n",
       "[5 rows x 228 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dpath = './data/'\n",
    "data = pd.read_csv(dpath + 'RentListingInquries_FE_train.csv')\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-01-04T03:16:12.937679Z",
     "start_time": "2018-01-04T03:16:10.772507Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bathrooms</th>\n",
       "      <th>bedrooms</th>\n",
       "      <th>price</th>\n",
       "      <th>price_bathrooms</th>\n",
       "      <th>price_bedrooms</th>\n",
       "      <th>room_diff</th>\n",
       "      <th>room_num</th>\n",
       "      <th>Year</th>\n",
       "      <th>Month</th>\n",
       "      <th>Day</th>\n",
       "      <th>...</th>\n",
       "      <th>virtual</th>\n",
       "      <th>walk</th>\n",
       "      <th>walls</th>\n",
       "      <th>war</th>\n",
       "      <th>washer</th>\n",
       "      <th>water</th>\n",
       "      <th>wheelchair</th>\n",
       "      <th>wifi</th>\n",
       "      <th>windows</th>\n",
       "      <th>work</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2950</td>\n",
       "      <td>1475.000000</td>\n",
       "      <td>1475.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>6</td>\n",
       "      <td>11</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2850</td>\n",
       "      <td>1425.000000</td>\n",
       "      <td>950.000000</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>6</td>\n",
       "      <td>24</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>3758</td>\n",
       "      <td>1879.000000</td>\n",
       "      <td>1879.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>3300</td>\n",
       "      <td>1650.000000</td>\n",
       "      <td>1100.000000</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>6</td>\n",
       "      <td>11</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2.0</td>\n",
       "      <td>2</td>\n",
       "      <td>4900</td>\n",
       "      <td>1633.333333</td>\n",
       "      <td>1633.333333</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>4</td>\n",
       "      <td>12</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 227 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   bathrooms  bedrooms  price  price_bathrooms  price_bedrooms  room_diff  \\\n",
       "0        1.0         1   2950      1475.000000     1475.000000        0.0   \n",
       "1        1.0         2   2850      1425.000000      950.000000       -1.0   \n",
       "2        1.0         1   3758      1879.000000     1879.000000        0.0   \n",
       "3        1.0         2   3300      1650.000000     1100.000000       -1.0   \n",
       "4        2.0         2   4900      1633.333333     1633.333333        0.0   \n",
       "\n",
       "   room_num  Year  Month  Day  ...   virtual  walk  walls  war  washer  water  \\\n",
       "0       2.0  2016      6   11  ...         0     0      0    0       0      0   \n",
       "1       3.0  2016      6   24  ...         0     0      0    1       0      0   \n",
       "2       2.0  2016      6    3  ...         0     0      0    0       0      0   \n",
       "3       3.0  2016      6   11  ...         0     0      0    0       0      0   \n",
       "4       4.0  2016      4   12  ...         0     0      0    1       0      0   \n",
       "\n",
       "   wheelchair  wifi  windows  work  \n",
       "0           0     0        0     0  \n",
       "1           0     0        0     0  \n",
       "2           0     0        0     0  \n",
       "3           1     0        0     0  \n",
       "4           0     0        0     0  \n",
       "\n",
       "[5 rows x 227 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "target = pd.read_csv(dpath + 'RentListingInquries_FE_test.csv')\n",
    "target.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-01-04T03:16:13.918617Z",
     "start_time": "2018-01-04T03:16:13.771560Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bathrooms</th>\n",
       "      <th>bedrooms</th>\n",
       "      <th>price</th>\n",
       "      <th>price_bathrooms</th>\n",
       "      <th>price_bedrooms</th>\n",
       "      <th>room_diff</th>\n",
       "      <th>room_num</th>\n",
       "      <th>Year</th>\n",
       "      <th>Month</th>\n",
       "      <th>Day</th>\n",
       "      <th>...</th>\n",
       "      <th>virtual</th>\n",
       "      <th>walk</th>\n",
       "      <th>walls</th>\n",
       "      <th>war</th>\n",
       "      <th>washer</th>\n",
       "      <th>water</th>\n",
       "      <th>wheelchair</th>\n",
       "      <th>wifi</th>\n",
       "      <th>windows</th>\n",
       "      <th>work</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2950</td>\n",
       "      <td>1475.000000</td>\n",
       "      <td>1475.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>6</td>\n",
       "      <td>11</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2850</td>\n",
       "      <td>1425.000000</td>\n",
       "      <td>950.000000</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>6</td>\n",
       "      <td>24</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>3758</td>\n",
       "      <td>1879.000000</td>\n",
       "      <td>1879.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>3300</td>\n",
       "      <td>1650.000000</td>\n",
       "      <td>1100.000000</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>6</td>\n",
       "      <td>11</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2.0</td>\n",
       "      <td>2</td>\n",
       "      <td>4900</td>\n",
       "      <td>1633.333333</td>\n",
       "      <td>1633.333333</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>4</td>\n",
       "      <td>12</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>3.0</td>\n",
       "      <td>3</td>\n",
       "      <td>9000</td>\n",
       "      <td>2250.000000</td>\n",
       "      <td>2250.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>4</td>\n",
       "      <td>7</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2800</td>\n",
       "      <td>1400.000000</td>\n",
       "      <td>933.333333</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>4</td>\n",
       "      <td>25</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1900</td>\n",
       "      <td>950.000000</td>\n",
       "      <td>1900.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>4</td>\n",
       "      <td>22</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>3000</td>\n",
       "      <td>1500.000000</td>\n",
       "      <td>1000.000000</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>4</td>\n",
       "      <td>28</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2300</td>\n",
       "      <td>1150.000000</td>\n",
       "      <td>2300.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>4</td>\n",
       "      <td>19</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>4800</td>\n",
       "      <td>2400.000000</td>\n",
       "      <td>2400.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2195</td>\n",
       "      <td>1097.500000</td>\n",
       "      <td>2195.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>4</td>\n",
       "      <td>23</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>4150</td>\n",
       "      <td>2075.000000</td>\n",
       "      <td>1383.333333</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>6</td>\n",
       "      <td>8</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>1.0</td>\n",
       "      <td>3</td>\n",
       "      <td>5200</td>\n",
       "      <td>2600.000000</td>\n",
       "      <td>1300.000000</td>\n",
       "      <td>-2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>1.0</td>\n",
       "      <td>3</td>\n",
       "      <td>4395</td>\n",
       "      <td>2197.500000</td>\n",
       "      <td>1098.750000</td>\n",
       "      <td>-2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>4</td>\n",
       "      <td>28</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>3050</td>\n",
       "      <td>1525.000000</td>\n",
       "      <td>1525.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>4</td>\n",
       "      <td>12</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2395</td>\n",
       "      <td>1197.500000</td>\n",
       "      <td>2395.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>4</td>\n",
       "      <td>14</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>3950</td>\n",
       "      <td>1975.000000</td>\n",
       "      <td>3950.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>4</td>\n",
       "      <td>8</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2977</td>\n",
       "      <td>1488.500000</td>\n",
       "      <td>2977.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>4</td>\n",
       "      <td>27</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2995</td>\n",
       "      <td>1497.500000</td>\n",
       "      <td>998.333333</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2200</td>\n",
       "      <td>1100.000000</td>\n",
       "      <td>2200.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>6</td>\n",
       "      <td>16</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2295</td>\n",
       "      <td>1147.500000</td>\n",
       "      <td>1147.500000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "     "
      ],
      "text/plain": [
       "       bathrooms  bedrooms  price  price_bathrooms  price_bedrooms  room_diff  \\\n",
       "0            1.0         1   2950      1475.000000     1475.000000        0.0   \n",
       "1            1.0         2   2850      1425.000000      950.000000       -1.0   \n",
       "2            1.0         1   3758      1879.000000     1879.000000        0.0   \n",
       "3            1.0         2   3300      1650.000000     1100.000000       -1.0   \n",
       "4            2.0         2   4900      1633.333333     1633.333333        0.0   \n",
       "5            3.0         3   9000      2250.000000     2250.000000        0.0   \n",
       "6            1.0         2   2800      1400.000000      933.333333       -1.0   \n",
       "7            1.0         0   1900       950.000000     1900.000000        1.0   \n",
       "8            1.0         2   3000      1500.000000     1000.000000       -1.0   \n",
       "9            1.0         0   2300      1150.000000     2300.000000        1.0   \n",
       "10           1.0         1   4800      2400.000000     2400.000000        0.0   \n",
       "11           1.0         0   2195      1097.500000     2195.000000        1.0   \n",
       "12           1.0         2   4150      2075.000000     1383.333333       -1.0   \n",
       "13           1.0         3   5200      2600.000000     1300.000000       -2.0   \n",
       "14           1.0         3   4395      2197.500000     1098.750000       -2.0   \n",
       "15           1.0         1   3050      1525.000000     1525.000000        0.0   \n",
       "16           1.0         0   2395      1197.500000     2395.000000        1.0   \n",
       "17           1.0         0   3950      1975.000000     3950.000000        1.0   \n",
       "18           1.0         0   2977      1488.500000     2977.000000        1.0   \n",
       "19           1.0         2   2995      1497.500000      998.333333       -1.0   \n",
       "20           1.0         0   2200      1100.000000     2200.000000        1.0   \n",
       "21           1.0         1   2295      1147.500000     1147.500000        0.0   \n",
       "22           2.0         2   3675      1225.000000     1225.000000        0.0   \n",
       "23           1.0         3   4400      2200.000000     1100.000000       -2.0   \n",
       "24           1.0         1   2970      1485.000000     1485.000000        0.0   \n",
       "25           1.0         0   2000      1000.000000     2000.000000        1.0   \n",
       "26           2.0         2   6195      2065.000000     2065.000000        0.0   \n",
       "27           1.0         0   1650       825.000000     1650.000000        1.0   \n",
       "28           1.0         2   3208      1604.000000     1069.333333       -1.0   \n",
       "29           1.0         0   1650       825.000000     1650.000000        1.0   \n",
       "...          ...       ...    ...              ...             ...        ...   \n",
       "74629        1.0         2   2895      1447.500000      965.000000       -1.0   \n",
       "74630        2.0         3   4200      1400.000000     1050.000000       -1.0   \n",
       "74631        1.0         2   3250      1625.000000     1083.333333       -1.0   \n",
       "74632        1.0         1   2895      1447.500000     1447.500000        0.0   \n",
       "74633        2.0         2   4500      1500.000000     1500.000000        0.0   \n",
       "74634        1.0         0   2995      1497.500000     2995.000000        1.0   \n",
       "74635        2.0         2   5750      1916.666667     1916.666667        0.0   \n",
       "74636        1.0         1   4495      2247.500000     2247.500000        0.0   \n",
       "74637        2.0         3   2599       866.333333      649.750000       -1.0   \n",
       "74638        1.0         1   2150      1075.000000     1075.000000        0.0   \n",
       "74639        0.0         3   9000      9000.000000     2250.000000       -3.0   \n",
       "74640        1.0         1   3805      1902.500000     1902.500000        0.0   \n",
       "74641        2.0         3   4150      1383.333333     1037.500000       -1.0   \n",
       "74642        1.0         0   2500      1250.000000     2500.000000        1.0   \n",
       "74643        1.0         3   3795      1897.500000      948.750000       -2.0   \n",
       "74644        2.0         2   6500      2166.666667     2166.666667        0.0   \n",
       "74645        1.0         2   2500      1250.000000      833.333333       -1.0   \n",
       "74646        1.0         1   3495      1747.500000     1747.500000        0.0   \n",
       "74647        1.0         2   3695      1847.500000     1231.666667       -1.0   \n",
       "74648        1.0         2   3025      1512.500000     1008.333333       -1.0   \n",
       "74649        1.0         0   1850       925.000000     1850.000000        1.0   \n",
       "74650        1.0         2   3000      1500.000000     1000.000000       -1.0   \n",
       "74651        2.0         3   9500      3166.666667     2375.000000       -1.0   \n",
       "74652        1.0         2   2100      1050.000000      700.000000       -1.0   \n",
       "74653        1.0         1   4250      2125.000000     2125.000000        0.0   \n",
       "74654        1.0         2   2000      1000.000000      666.666667       -1.0   \n",
       "74655        1.0         1   3649      1824.500000     1824.500000        0.0   \n",
       "74656        1.0         0   2195      1097.500000     2195.000000        1.0   \n",
       "74657        1.0         1   1775       887.500000      887.500000        0.0   \n",
       "74658        1.0         2   2850      1425.000000      950.000000       -1.0   \n",
       "\n",
       "       room_num  Year  Month  Day  ...   virtual  walk  walls  war  washer  \\\n",
       "0           2.0  2016      6   11  ...         0     0      0    0       0   \n",
       "1           3.0  2016      6   24  ...         0     0      0    1       0   \n",
       "2           2.0  2016      6    3  ...         0     0      0    0       0   \n",
       "3           3.0  2016      6   11  ...         0     0      0    0       0   \n",
       "4           4.0  2016      4   12  ...         0     0      0    1       0   \n",
       "5           6.0  2016      4    7  ...         0     0      0    0       0   \n",
       "6           3.0  2016      4   25  ...         0     0      0    0       0   \n",
       "7           1.0  2016      4   22  ...         0     0      0    1       0   \n",
       "8           3.0  2016      4   28  ...         0     0      0    0       0   \n",
       "9           1.0  2016      4   19  ...         0     0      0    0       0   \n",
       "10          2.0  2016      4    6  ...         0     0      0    0       0   \n",
       "11          1.0  2016      4   23  ...         0     0      0    1       0   \n",
       "12          3.0  2016      6    8  ...         0     0      0    0       0   \n",
       "13          4.0  2016      4    6  ...         0     0      0    1       0   \n",
       "14          4.0  2016      4   28  ...         0     0      0    1       0   \n",
       "15          2.0  2016      4   12  ...         0     0      0    1       0   \n",
       "16          1.0  2016      4   14  ...         0     0      0    0       0   \n",
       "17          1.0  2016      4    8  ...         0     0      0    0       0   \n",
       "18          1.0  2016      4   27  ...         0     0      0    0       0   \n",
       "19          3.0  2016      4    6  ...         0     0      0    0       0   \n",
       "20          1.0  2016      6   16  ...         0     0      0    0       0   \n",
       "21          2.0  2016      4   12  ...         0     0      0    0       0   \n",
       "22          4.0  2016      4   12  ...         0     0      0    1       0   \n",
       "23          4.0  2016      4    9  ...         0     0      0    0       0   \n",
       "24          2.0  2016      4   24  ...         0     0      0    1       0   \n",
       "25          1.0  2016      4   23  ...         0     0      0    0       0   \n",
       "26          4.0  2016      4   19  ...         0     0      0    0       0   \n",
       "27          1.0  2016      4    2  ...         0     0      0    0       0   \n",
       "28          3.0  2016      6    6  ...         0     0      0    0       0   \n",
       "29          1.0  2016      4   18  ...         0     0      0    0       0   \n",
       "...         ...   ...    ...  ...  ...       ...   ...    ...  ...     ...   \n",
       "74629       3.0  2016      4   11  ...         0     0      0    1       0   \n",
       "74630       5.0  2016      4    2  ...         0     0      0    0       0   \n",
       "74631       3.0  2016      4   26  ...         0     0      0    0       0   \n",
       "74632       2.0  2016      6   11  ...         0     0      0    1       0   \n",
       "74633       4.0  2016      4   22  ...         0     0      0    0       0   \n",
       "74634       1.0  2016      4    6  ...         0     0      0    0       0   \n",
       "74635       4.0  2016      4   13  ...         0     0      0    0       0   \n",
       "74636       2.0  2016      4    4  ...         0     0      0    0       0   \n",
       "74637       5.0  2016      4   10  ...         0     0      0    0       0   \n",
       "74638       2.0  2016      6    8  ...         0     0      0    0       0   \n",
       "74639       3.0  2016      4   21  ...         0     0      0    0       0   \n",
       "74640       2.0  2016      4   13  ...         0     0      0    0       0   \n",
       "74641       5.0  2016      4    7  ...         0     0      0    0       0   \n",
       "74642       1.0  2016      4   26  ...         0     0      0    0       0   \n",
       "74643       4.0  2016      4   19  ...         0     0      0    0       0   \n",
       "74644       4.0  2016      4   13  ...         0     0      0    0       0   \n",
       "74645       3.0  2016      4   16  ...         0     0      0    0       0   \n",
       "74646       2.0  2016      4   21  ...         0     0      0    0       0   \n",
       "74647       3.0  2016      4   21  ...         0     0      0    0       0   \n",
       "74648       3.0  2016      6   24  ...         0     0      0    0       0   \n",
       "74649       1.0  2016      4    8  ...         0     0      0    0       0   \n",
       "74650       3.0  2016      4   30  ...         0     0      0    0       0   \n",
       "74651       5.0  2016      4   19  ...         0     0      0    0       0   \n",
       "74652       3.0  2016      4    2  ...         0     0      0    1       0   \n",
       "74653       2.0  2016      4   21  ...         0     0      0    1       0   \n",
       "74654       3.0  2016      4   16  ...         0     0      0    1       0   \n",
       "74655       2.0  2016      4    6  ...         0     0      0    0       0   \n",
       "74656       1.0  2016      4   16  ...         0     0      0    1       0   \n",
       "74657       2.0  2016      4   16  ...         0     0      0    0       0   \n",
       "74658       3.0  2016      4   26  ...         0     0      0    0       0   \n",
       "\n",
       "       water  wheelchair  wifi  windows  work  \n",
       "0          0           0     0        0     0  \n",
       "1          0           0     0        0     "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/html": [
       "<b>limit_output extension: Maximum message size of 10000 exceeded with 27129 characters</b>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "target.fillna(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-01-04T03:16:15.281617Z",
     "start_time": "2018-01-04T03:16:14.806839Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/anaconda3/lib/python3.6/site-packages/pandas/core/indexing.py:517: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  self.obj[item] = s\n"
     ]
    }
   ],
   "source": [
    "#将噪声移除\n",
    "def remove_noise(df):\n",
    "#remove some noise\n",
    "    df= df[df.price < 10000]\n",
    "\n",
    "    df.loc[df[\"bathrooms\"] == 112, \"bathrooms\"] = 1.5\n",
    "    df.loc[df[\"bathrooms\"] == 10, \"bathrooms\"] = 1\n",
    "    df.loc[df[\"bathrooms\"] == 20, \"bathrooms\"] = 2\n",
    "    return df\n",
    "data = remove_noise(data)\n",
    "target = remove_noise(target)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 特征编码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-01-04T03:16:16.421309Z",
     "start_time": "2018-01-04T03:16:16.352646Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "y_train = data['interest_level']\n",
    "X_train = data.drop('interest_level',axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 模型训练"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "参数为 learning_rate =0.1,n_estimators=247,max_depth=6,min_child_weight=2,gamma=0,subsample=0.9,colsample_bytree=0.6,colsample_bylevel = 0.7,objective= 'multi:softprob',reg_alpha=2,reg_lambda=0.5,seed=3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-01-04T03:18:25.291378Z",
     "start_time": "2018-01-04T03:16:19.811017Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "XGBClassifier(base_score=0.5, colsample_bylevel=0.7, colsample_bytree=0.6,\n",
       "       gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=6,\n",
       "       min_child_weight=2, missing=None, n_estimators=247, nthread=-1,\n",
       "       objective='multi:softprob', reg_alpha=2, reg_lambda=0.5,\n",
       "       scale_pos_weight=1, seed=3, silent=True, subsample=0.9)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xgb = XGBClassifier(\n",
    "        learning_rate =0.1,\n",
    "        n_estimators=247,  #第二轮参数调整得到的n_estimators最优值\n",
    "        max_depth=6,\n",
    "        min_child_weight=2,\n",
    "        gamma=0,\n",
    "        subsample=0.9,\n",
    "        colsample_bytree=0.6,\n",
    "        colsample_bylevel = 0.7,\n",
    "        objective= 'multi:softprob',\n",
    "        reg_alpha=2,\n",
    "        reg_lambda=0.5,\n",
    "        seed=3)\n",
    "xgb.fit(X_train,y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-01-04T02:19:42.241727Z",
     "start_time": "2018-01-04T02:19:42.237033Z"
    },
    "collapsed": true
   },
   "source": [
    "# 对测试集进行测试，生成提交文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-01-04T03:22:27.276860Z",
     "start_time": "2018-01-04T03:22:22.525637Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([2, 1, 2, ..., 2, 1, 2])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_test_pred = xgb.predict(target)\n",
    "y_test_pred"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-01-04T03:24:51.224067Z",
     "start_time": "2018-01-04T03:24:43.242138Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "target['interest_level'] = y_test_pred\n",
    "target.to_csv('submission.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "hide_input": false,
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  },
  "latex_envs": {
   "LaTeX_envs_menu_present": true,
   "autocomplete": true,
   "bibliofile": "biblio.bib",
   "cite_by": "apalike",
   "current_citInitial": 1,
   "eqLabelWithNumbers": true,
   "eqNumInitial": 1,
   "hotkeys": {
    "equation": "Ctrl-E",
    "itemize": "Ctrl-I"
   },
   "labels_anchors": false,
   "latex_user_defs": false,
   "report_style_numbering": false,
   "user_envs_cfg": false
  },
  "nbTranslate": {
   "displayLangs": [
    "*"
   ],
   "hotkey": "alt-t",
   "langInMainMenu": true,
   "sourceLang": "en",
   "targetLang": "fr",
   "useGoogleTranslate": true
  },
  "toc": {
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": "block",
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
